import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import random
import keras
import math
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GroupKFold ,GroupShuffleSplit
from sklearn import preprocessing
from keras import backend as K
from sklearn.preprocessing import MinMaxScaler , StandardScaler
from sklearn.decomposition import PCA
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation
from scipy import optimize
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
# get the FD004 data
def get_data():
dir_path = 'C:/Users/aalah/0_Stage_CESI/turbofan_failure-master/data/'
dependent_var = ['RUL']
index_names = ['Unit', 'Cycle']
setting_names = ['Altitude', 'Mach', 'TRA']
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi','NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']
col_names = index_names + setting_names + sensor_names
df_train = pd.read_csv(dir_path+'train_FD004.txt',delim_whitespace=True,names=col_names)
rul_train = pd.DataFrame(df_train.groupby('Unit')['Cycle'].max()).reset_index()
rul_train.columns = ['Unit', 'max']
df_train = df_train.merge(rul_train, on=['Unit'], how='left')
df_train['RUL'] = df_train['max'] - df_train['Cycle']
df_train.drop('max', axis=1, inplace=True)
df_test = pd.read_csv(dir_path+'test_FD004.txt', delim_whitespace=True, names=col_names)
y_test = pd.read_csv(dir_path+'RUL_FD004.txt', delim_whitespace=True,names=["RUL"])
#y_true["Unit"] = y_true.index + 1
return df_train, df_test, y_test
##--------------- data preprocessing --------------------------------##
#add operational condition to then normalize the data based on these operational conditions
def add_operational_condition(df):
df_op_cond = df.copy()
df_op_cond['Altitude'] = df_op_cond['Altitude'].round()
df_op_cond['Mach'] = df_op_cond['Mach'].round(decimals=2)
df_op_cond['TRA'] = df_op_cond['TRA'].round()
# converting settings to string and concatanating makes the operating condition into a categorical variable
df_op_cond['op_cond'] = df_op_cond['Altitude'].astype(str) + '_' + \
df_op_cond['Mach'].astype(str) + '_' + \
df_op_cond['TRA'].astype(str)
return df_op_cond
# Normalize the data based on the operational condition
def condition_scaler(df_train, df_test, sensor_names):
#Apply operating condition specific scaling
scaler = MinMaxScaler(feature_range = (0, 1))
for condition in df_train['op_cond'].unique():
scaler.fit(df_train.loc[df_train['op_cond']==condition, sensor_names])
df_train.loc[df_train['op_cond']==condition, sensor_names] = scaler.transform(df_train.loc[df_train['op_cond']==condition, sensor_names])
df_test.loc[df_test['op_cond']==condition, sensor_names] = scaler.transform(df_test.loc[df_test['op_cond']==condition, sensor_names])
return df_train, df_test
# denoise the signal using the exponential signal wih using the variable alpha
def exponential_smoothing(df, sensors, n_samples, alpha):
df = df.copy()
# first, calculate the exponential weighted mean of desired sensors
df[sensors] = df.groupby('Unit')[sensors].apply(lambda x: x.ewm(alpha=alpha).mean())
# second, drop first n_samples of each unit_nr to reduce filter delay
def create_mask(data, samples):
result = np.ones_like(data)
result[0:samples] = 0
return result
mask = df.groupby('Unit')['Unit'].transform(create_mask, samples=n_samples).astype(bool)
df = df[mask]
return df
#to plot each sensors with respect to the RUL
def plot_signal(df, signal_name, unit=None): #Visualisation
plt.figure(figsize=(13,5))
if unit:
plt.plot('RUL', signal_name,
data=df[df['Unit']==unit])
else:
for i in train['Unit'].unique():
if (i % 10 == 0): # only ploting every 10th unit_nr
plt.plot('RUL', signal_name,
data=df[df['Unit']==i])
plt.xlim(350, 0) # reverse the x-axis so RUL counts down to zero
plt.xticks(np.arange(0, 375, 25))
plt.ylabel(signal_name)
plt.xlabel('Remaining Use fulLife')
#plt.savefig(signal_name+'.jpeg')
plt.show()
#generate train data sequences of 3D dimension for the LSTM's input
def gen_train_data(df, sequence_length, columns):
data = df[columns].values
num_elements = data.shape[0]
# -1 and +1 because of Python indexing
for start, stop in zip(range(0, num_elements-(sequence_length-1)), range(sequence_length, num_elements+1)):
yield data[start:stop, :]
def gen_data_sequences_train(df, sequence_length, columns, unit_nrs=np.array([])):
if unit_nrs.size <= 0:
unit_nrs = df['Unit'].unique()
data_gen = (list(gen_train_data(df[df['Unit']==unit_nr], sequence_length, columns))
for unit_nr in unit_nrs)
data_array = np.concatenate(list(data_gen)).astype(np.float32)
return data_array
#generate train RULs (labels) sequences of 3D dimension for the LSTM's input
def gen_ruls(df, sequence_length, label):
data_matrix = df[label].values
num_elements = data_matrix.shape[0]
# -1 because I want to predict the rul of that last row in the sequence, not the next row
return data_matrix[sequence_length-1:num_elements, :]
def gen_rul_sequences_train(df, sequence_length, label, unit_nrs=np.array([])):
if unit_nrs.size <= 0:
unit_nrs = df['Unit'].unique()
label_gen = [gen_ruls(df[df['Unit']==unit_nr], sequence_length, label)
for unit_nr in unit_nrs]
label_array = np.concatenate(label_gen).astype(np.float32)
return label_array
#generate test data sequence of 3D dimension for the LSTM's input
def gen_test_data(df, sequence_length, columns, mask_value):
if df.shape[0] < sequence_length:
data_matrix = np.full(shape=(sequence_length, len(columns)), fill_value=mask_value) # pad
idx = data_matrix.shape[0] - df.shape[0]
data_matrix[idx:,:] = df[columns].values # fill with available data
else:
data_matrix = df[columns].values
# specifically yield the last possible sequence
stop = num_elements = data_matrix.shape[0]
start = stop - sequence_length
for i in list(range(1)):
yield data_matrix[start:stop, :]
def plot_loss(fit_history):
plt.figure(figsize=(13,5))
plt.plot(range(1, len(fit_history.history['loss'])+1), fit_history.history['loss'], label='train')
plt.plot(range(1, len(fit_history.history['val_loss'])+1), fit_history.history['val_loss'], label='validate')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
def new_column (df, column):
#df = df.sort_values(by=column, ascending=False)
df[column] = range(1, len(df) + 1)
return df
#----------------------------Prepare data for searching exponential smoothing's alpha (Grid Search)-------------------#
def prep_data(X_train, X_test, chosen_sensors, alpha, rul):
X_train_interim, X_test_interim = condition_scaler(X_train, X_test, chosen_sensors)
X_train_interim = exponential_smoothing(X_train_interim, chosen_sensors, 0, alpha)
X_test_interim = exponential_smoothing(X_test_interim, chosen_sensors, 0, alpha)
X_train_interim['RUL'].clip(upper=rul, inplace=True)
return X_train_interim, X_test_interim
def rul_piecewise_fct(X_train, rul):
X_train['RUL'].clip(upper=rul, inplace=True)
return X_train
#------------------------------------------LSTM model used ------------------------------#
#def create_lstm_model(input_shape, nodes_per_layer, dropout, learning_rate_):
def create_lstm_model(input_shape, dropout, learning_rate_):
model = Sequential()
#model.add(LSTM(units=nodes_per_layer, activation='tanh',input_shape=(sequence_length, 7)))
#model.add(Dense(units=nodes_per_layer, activation='relu'))
model.add(LSTM(units=128, activation='tanh',input_shape=(sequence_length, len(remaining_sensors_7))))
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(1, activation='relu'))
model.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(lr=learning_rate_))
return model
#----------------------------------Model evaluation metrics -------------------------#
def compute_MAPE(y_true, y_hat):
mape = np.mean(np.abs((y_true - y_hat)/y_true))*100
return mape
def root_mean_squared_error(y_true, y_pred):
return np.sqrt(np.mean(np.square(y_pred - y_true)))
#the score defined in the paper
def compute_s_score(rul_true, rul_pred):
diff = rul_pred - rul_true
return np.sum(np.where(diff < 0, np.exp(-diff/13)-1, np.exp(diff/10)-1))
#evaluate the model with R² and RMSE
def evaluate(y_true, y_hat, label='test'):
mse = mean_squared_error(y_true, y_hat)
rmse = np.sqrt(mse)
variance = r2_score(y_true, y_hat)
print('{} set RMSE:{}, R2:{}'.format(label, rmse, variance))
# get data
train, test, y_test = get_data()
train = train.drop(['T20','P20','Nf_dmd','PCNfR_dmd','farB'],axis=1)
test = test.drop(['T20','P20','Nf_dmd','PCNfR_dmd','farB'],axis=1)
train.shape, test.shape, y_test.shape
((61249, 22), (41214, 21), (248, 1))
#The chosen sensors signify the remaining 16 sensors after normalization
chosen_sensors = ['T24', 'T30', 'T50', 'P15', 'P30','Nf', 'Nc', 'epr', 'Ps30', 'phi', 'NRf', 'NRc', 'BPR', 'htBleed', 'W31','W32']
#adding operational condition
X_train_interm = add_operational_condition(train)
X_test_interm = add_operational_condition(test)
#normalizing based on these operational condition
X_train_condition_scaled, X_test_condition_scaled = condition_scaler(X_train_interm, X_test_interm, chosen_sensors)
#drop the operational condition variables as they were only helpful for the scaling
data_train = X_train_condition_scaled.drop(['Cycle','Altitude', 'Mach', 'TRA','op_cond'],axis=1)
data_test = X_test_condition_scaled.drop(['Cycle','Altitude', 'Mach', 'TRA','op_cond'],axis=1)
# smooth the signal
train_smooth = exponential_smoothing(data_train,chosen_sensors,0,0.3)
test_smooth = exponential_smoothing(data_test,chosen_sensors,0,0.3)
df_train_unit = train_smooth.Unit
df_test_unit = test_smooth.Unit
df_train_RUL = train_smooth.RUL
train_smooth['RUL'].clip(upper=130, inplace=True)
Xtrain = train_smooth.drop(['RUL','Unit'],axis=1)
Xtest = test_smooth.drop(['Unit'],axis=1)
#get output file from RStudio after performining ClustOfVar
df7 = pd.read_csv('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/dataset/d7.txt',sep=',',names=['cluster1','cluster2','cluster3','cluster4','cluster5','cluster6','cluster7'
])
df_train_rul = train['RUL']
df_train_unit = train['Unit']
df_test_unit = test['Unit']
#As the file has train and test merged together, we will split them into train and test
cluster7_train = df7[ :61249 ].reset_index()
cluster7_train = cluster7_train.drop('index',axis=1)
cluster7_test = df7[ 61249 : ].reset_index()
cluster7_test = cluster7_test.drop('index',axis=1)
#We add Unit and RUL to these newly created datasets
mid7_train = pd.concat([cluster7_train, df_train_rul], axis=1, join="inner")
train_7 = pd.concat([df_train_unit,mid7_train], axis=1, join="inner")
test_7 = pd.concat([df_test_unit,cluster7_test], axis=1, join="inner")
sequence_length = 30 #TW was found using grid-search
train_7['RUL'].clip(upper=130, inplace=True) #Rectified RUL was found using grid-search
remaining_sensors_7 = ['cluster1','cluster2','cluster3','cluster4','cluster5','cluster6','cluster7']
train_7.drop(['Unit'],axis=1).corr()
| cluster1 | cluster2 | cluster3 | cluster4 | cluster5 | cluster6 | cluster7 | RUL | |
|---|---|---|---|---|---|---|---|---|
| cluster1 | 1.000000 | -0.508032 | -0.278424 | -0.564850 | -0.537588 | -0.370279 | 0.204259 | 0.798214 |
| cluster2 | -0.508032 | 1.000000 | -0.138139 | 0.190258 | 0.091520 | -0.024214 | -0.430536 | -0.129123 |
| cluster3 | -0.278424 | -0.138139 | 1.000000 | 0.532406 | 0.634178 | 0.862535 | 0.850918 | -0.354672 |
| cluster4 | -0.564850 | 0.190258 | 0.532406 | 1.000000 | 0.799138 | 0.596643 | 0.307028 | -0.563758 |
| cluster5 | -0.537588 | 0.091520 | 0.634178 | 0.799138 | 1.000000 | 0.616566 | 0.380294 | -0.594117 |
| cluster6 | -0.370279 | -0.024214 | 0.862535 | 0.596643 | 0.616566 | 1.000000 | 0.706224 | -0.396438 |
| cluster7 | 0.204259 | -0.430536 | 0.850918 | 0.307028 | 0.380294 | 0.706224 | 1.000000 | 0.018556 |
| RUL | 0.798214 | -0.129123 | -0.354672 | -0.563758 | -0.594117 | -0.396438 | 0.018556 | 1.000000 |
plt.figure(figsize=(16, 6))# define the mask to set the values in the upper triangle to True
mask = np.triu(np.ones_like(train_7.drop(['Unit'],axis=1).corr(), dtype=np.bool))
heatmap = sns.heatmap(train_7.drop(['Unit'],axis=1).corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('[FD004: 7 clusters (Train)] Correlation Heatmap', fontdict={'fontsize':18}, pad=16)
Text(0.5, 1.0, '[FD004: 7 clusters (Train)] Correlation Heatmap')
g = test_7.groupby('Unit')
dataframe = pd.concat([(pd.concat([g.tail(1)]).sort_values('Unit').reset_index(drop=True)),y_test], axis=1, join="inner")
plt.figure(figsize=(16, 6))# define the mask to set the values in the upper triangle to True
mask = np.triu(np.ones_like(dataframe.drop(['Unit'],axis=1).corr(), dtype=np.bool))
heatmap = sns.heatmap(dataframe.drop(['Unit'],axis=1).corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('[FD004: 7 clusters (Test)] Correlation Heatmap', fontdict={'fontsize':18}, pad=16)
Text(0.5, 1.0, '[FD004: 7 clusters (Test)] Correlation Heatmap')
# load our model with 1 LSTM layer
loaded_model_lstm_7 = tf.keras.models.load_model('C:/Users/aalah/Downloads/model_FD004_clusters_130_best')
loaded_model_lstm_7.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_2 (LSTM) (None, 128) 69632
dense_4 (Dense) (None, 128) 16512
dropout_2 (Dropout) (None, 128) 0
dense_5 (Dense) (None, 1) 129
=================================================================
Total params: 86,273
Trainable params: 86,273
Non-trainable params: 0
_________________________________________________________________
# create sequences train, test
train_array_7 = gen_data_sequences_train(train_7, sequence_length,remaining_sensors_7)
label_array_7 = gen_rul_sequences_train(train_7, sequence_length, ['RUL'])
test_gen_7 = (list(gen_test_data(test_7[test_7['Unit']==unit_nr], sequence_length,remaining_sensors_7, -99.))
for unit_nr in test_7['Unit'].unique())
test_array_7 = np.concatenate(list(test_gen_7)).astype(np.float32)
y_true_clipped = y_test.copy()
y_true_clipped.clip(upper=130,inplace=True)
# predict and evaluate
print('(FD004) For 7 clusters (Unclipped true RUL): ')
y_hat_train = loaded_model_lstm_7.predict(train_array_7)
evaluate(label_array_7, y_hat_train, 'train')
y_hat_test = loaded_model_lstm_7.predict(test_array_7)
evaluate(y_test, y_hat_test)
print(' ')
print('(FD004) For 7 clusters (Clipped true RUL): ')
y_hat_test = loaded_model_lstm_7.predict(test_array_7)
evaluate(y_true_clipped, y_hat_test)
(FD004) For 7 clusters (Unclipped true RUL): train set RMSE:15.433294296264648, R2:0.8730758709070203 test set RMSE:25.451214849685496, R2:0.7821061812959538 (FD004) For 7 clusters (Clipped true RUL): test set RMSE:16.140314014674942, R2:0.8678543634792815
print("(FD004-Test) S-score For 7 clusters (Unclipped true RUL) : ",compute_s_score(y_test, y_hat_test))
print(' ')
print("(FD004-Test) S-score For 7 clusters (Clipped true RUL) : ",compute_s_score(y_true_clipped, y_hat_test))
print(' ')
print("(FD004-Test) MAPE For 7 clusters (Unclipped true RUL): ",compute_MAPE(y_test, y_hat_test))
print(' ')
print("(FD004-Test) MAPE For 7 clusters (Clipped true RUL) : ",compute_MAPE(y_true_clipped, y_hat_test))
(FD004-Test) S-score For 7 clusters (Unclipped true RUL) : 4144.575388129244 (FD004-Test) S-score For 7 clusters (Clipped true RUL) : 1299.1969731549616 (FD004-Test) MAPE For 7 clusters (Unclipped true RUL): RUL 27.451503 dtype: float64 (FD004-Test) MAPE For 7 clusters (Clipped true RUL) : RUL 23.487267 dtype: float64
#for test dataset
plt.figure(figsize=(15,5))
plt.plot(y_hat_test, color='red', label='Predicted RUL',marker='x',linestyle ='--')
plt.plot(y_test, color='gray', label='True RUL',marker='x',linestyle ='--')
plt.ylabel("RUL")
plt.xlabel("Cycle")
plt.legend(loc='upper left')
plt.title('[FD004 : 7 clusters] Predicted RUL and true RUL')
plt.show()
#for test dataset
plt.figure(figsize=(15,5))
plt.plot(y_hat_test, color='red', label='Predicted RUL',marker='x',linestyle ='--')
plt.plot(y_true_clipped, color='gray', label='Rectified RUL',marker='x',linestyle ='--')
plt.ylabel("RUL")
plt.xlabel("Cycle")
plt.legend(loc='upper left')
plt.title('[FD004 : 7 clusters] Predicted RUL and rectified true RUL')
plt.show()
#test
flat_y_hat_7 = np.ravel(y_hat_test)
flat_y_true = np.ravel(y_test)
flat_y_true_clipped = np.ravel(y_true_clipped)
DF_test = pd.DataFrame({'True_Rul': flat_y_true,'Rectified_true_RUL': flat_y_true_clipped, 'Pred_RUL': flat_y_hat_7})
ax = DF_test[['True_Rul','Rectified_true_RUL', 'Pred_RUL']].plot(kind='box', title='[FD004 : 7 clusters] Boxplot of Predicted RUL, true RUL and rectified RUL', showmeans=True)
plt.show()
DF_test = new_column(DF_test,'s_score_7_clusters')
DF_test = new_column(DF_test,'s_score_7_clusters_clip')
DF_test = new_column(DF_test,'d_7_clusters_clip')
DF_test = new_column(DF_test,'d_7_clusters')
DF_test = new_column(DF_test,'RMSE_7_clusters')
DF_test = new_column(DF_test,'RMSE_7_clusters_clip')
DF_test = new_column(DF_test,'Cor_7clust')
DF_test = new_column(DF_test,'Cor_7clust_clip')
for i in range(len(DF_test.True_Rul)):
DF_test['s_score_7_clusters'][i]=compute_s_score(DF_test.True_Rul[i],DF_test.Pred_RUL[i])
DF_test['d_7_clusters'][i]= DF_test.Pred_RUL[i] - DF_test.True_Rul[i]
DF_test['RMSE_7_clusters'][i]= root_mean_squared_error(DF_test.True_Rul[i],DF_test.Pred_RUL[i])
DF_test['s_score_7_clusters_clip'][i]=compute_s_score(DF_test.Rectified_true_RUL[i],DF_test.Pred_RUL[i])
DF_test['d_7_clusters_clip'][i]= DF_test.Pred_RUL[i] - DF_test.Rectified_true_RUL[i]
DF_test['RMSE_7_clusters_clip'][i]= root_mean_squared_error(DF_test.Rectified_true_RUL[i],DF_test.Pred_RUL[i])
for i in range(len(DF_test.True_Rul)):
if(DF_test.d_7_clusters[i]<-13):
DF_test['Cor_7clust'][i] = 'FP'
if(DF_test.d_7_clusters[i]>10):
DF_test['Cor_7clust'][i] = 'FN'
if(DF_test.d_7_clusters[i]>=-13 and DF_test.d_7_clusters[i]<=10):
DF_test['Cor_7clust'][i]='1'
for i in range(len(DF_test.Rectified_true_RUL)):
if(DF_test.d_7_clusters_clip[i]<-13):
DF_test['Cor_7clust_clip'][i] = 'FP'
if(DF_test.d_7_clusters_clip[i]>10):
DF_test['Cor_7clust_clip'][i] = 'FN'
if(DF_test.d_7_clusters_clip[i]>=-13 and DF_test.d_7_clusters_clip[i]<=10):
DF_test['Cor_7clust_clip'][i]='1'
DF_test.describe()
| True_Rul | Rectified_true_RUL | Pred_RUL | s_score_7_clusters | s_score_7_clusters_clip | d_7_clusters_clip | d_7_clusters | RMSE_7_clusters | RMSE_7_clusters_clip | |
|---|---|---|---|---|---|---|---|---|---|
| count | 248.000000 | 248.000000 | 248.000000 | 248.000000 | 248.000000 | 248.000000 | 248.000000 | 248.000000 | 248.000000 |
| mean | 86.552419 | 79.125000 | 82.191444 | 16.711998 | 5.238697 | 3.066446 | -4.360974 | 19.032939 | 11.896870 |
| std | 54.634054 | 44.490054 | 42.728584 | 65.147933 | 12.233847 | 15.878390 | 25.125520 | 16.931257 | 10.929588 |
| min | 6.000000 | 6.000000 | 3.019173 | 0.001061 | 0.001061 | -61.393436 | -89.137596 | 0.010605 | 0.010605 |
| 25% | 36.000000 | 36.000000 | 39.040292 | 0.605859 | 0.305527 | -4.650210 | -18.397741 | 5.237692 | 3.191210 |
| 50% | 88.000000 | 88.000000 | 97.254143 | 2.687879 | 1.123596 | 1.516660 | 0.860460 | 15.076660 | 8.123750 |
| 75% | 126.750000 | 126.750000 | 121.706453 | 11.555973 | 4.066649 | 11.183425 | 11.183425 | 29.598778 | 17.814741 |
| max | 195.000000 | 130.000000 | 138.929611 | 949.262407 | 111.457124 | 43.813622 | 43.813622 | 89.137596 | 61.393436 |
DF_test.groupby(['Cor_7clust']).size()
Cor_7clust 1 107 FN 67 FP 74 dtype: int64
DF_test.groupby(['Cor_7clust_clip']).size()
Cor_7clust_clip 1 149 FN 67 FP 32 dtype: int64
x = [0,50,100,150,200,250]
y = [-13,-13,-13,-13,-13,-13]
y2 = [10,10,10,10,10,10]
plt.plot(x, y,color='green')
plt.plot(x, y2,color='indigo')
plt.scatter(DF_test.index, DF_test.d_7_clusters_clip,marker="x")
for i in range(len(DF_test.Rectified_true_RUL)):
if(DF_test.d_7_clusters_clip[i]>11):
plt.plot(DF_test.index[i],DF_test.d_7_clusters_clip[i], color='coral', marker='x')
if(DF_test.d_7_clusters_clip[i]<-14):
plt.plot(DF_test.index[i],DF_test.d_7_clusters_clip[i], color='orange', marker='x')
if (DF_test.d_7_clusters_clip[i]>=-14 and DF_test.d_7_clusters_clip[i]<=11):
plt.plot(DF_test.index[i],DF_test.d_7_clusters_clip[i], color='grey', marker='x')
plt.xlabel('Unit ID')
plt.ylabel('$h_i$ = RUL\'- RUL')
plt.title('[FD004: 7 clusters] $h_i$ for Predicted RUL and Rectified True RUL')
Text(0.5, 1.0, '[FD004: 7 clusters] $h_i$ for Predicted RUL and Rectified True RUL')
dataframe_test = DF_test.sort_values('True_Rul',ascending=True).reset_index()
dataframe_test.columns
Index(['index', 'True_Rul', 'Rectified_true_RUL', 'Pred_RUL',
's_score_7_clusters', 's_score_7_clusters_clip', 'd_7_clusters_clip',
'd_7_clusters', 'RMSE_7_clusters', 'RMSE_7_clusters_clip', 'Cor_7clust',
'Cor_7clust_clip'],
dtype='object')
FN = []
FP = []
FN_index = []
FP_index = []
for i in range(len(dataframe_test.Rectified_true_RUL)):
if(dataframe_test.d_7_clusters_clip[i]>9 and dataframe_test.d_7_clusters_clip[i]<=10):
FN.append(dataframe_test.Pred_RUL[i])
FN_index.append(dataframe_test.index[i])
if (dataframe_test.d_7_clusters_clip[i]>-14 and dataframe_test.d_7_clusters_clip[i]<=-12):
FP.append(dataframe_test.Pred_RUL[i])
FP_index.append(dataframe_test.index[i])
x = [0,50,100,150,200,250]
y = [-13,-13,-13,-13,-13,-13]
y2 = [10,10,10,10,10,10]
plt.plot(x, y,color='green')
plt.plot(x, y2,color='indigo')
plt.scatter(DF_test.index, DF_test.d_7_clusters,marker="x")
for i in range(len(DF_test.True_Rul)):
if(DF_test.d_7_clusters[i]>11):
plt.plot(DF_test.index[i],DF_test.d_7_clusters[i], color='red', marker='x')
if(DF_test.d_7_clusters[i]<-14):
plt.plot(DF_test.index[i],DF_test.d_7_clusters[i], color='orange', marker='x')
if (DF_test.d_7_clusters[i]>=-14 and DF_test.d_7_clusters[i]<=11):
plt.plot(DF_test.index[i],DF_test.d_7_clusters[i], color='grey', marker='x')
plt.xlabel('Unit ID')
plt.ylabel('$h_i$ = RUL\'- RUL')
plt.title('[FD004: 7 clusters] $h_i$ for Predicted RUL and True RUL')
Text(0.5, 1.0, '[FD004: 7 clusters] $h_i$ for Predicted RUL and True RUL')
sns.distplot(DF_test.RMSE_7_clusters)
<AxesSubplot:xlabel='RMSE_7_clusters', ylabel='Density'>
DF_test.RMSE_7_clusters.plot(kind='box', title='[FD004: 7 clusters] Boxplot of RMSE rectified true RUL', showmeans=True)
<AxesSubplot:title={'center':'[FD004: 7 clusters] Boxplot of RMSE rectified true RUL'}>
for unit in train_7['Unit'].unique():
#engine16 = df_PCA_train_16[df_PCA_train_16['Unit']==unit]
engine1 = train_7[train_7['Unit']==unit]
engine_array1 = gen_data_sequences_train(engine1, sequence_length,remaining_sensors_7)
engine_label_array1 = gen_rul_sequences_train(engine1, sequence_length, ['RUL'])
y_hat_engine1 = loaded_model_lstm_7.predict(engine_array1)
error = y_hat_engine1 - engine_label_array1
# for one engine from the train dataset
plt.figure(figsize=(10,5))
x = list(np.arange(0,len(engine1)+1,50))
y = [0] * len(x)
y1 = [-13] * len(x)
y2 = [10] * len(x)
plt.plot(x, y2 , color='indigo', label='FN limit')
plt.plot(x, y1 , color='green', label='FP limit')
#plt.plot(y_hat_engine16, color='red', label='Predicted RUL (16 PC)')
plt.plot(engine_label_array1, color='blue', label='True RUL')
plt.plot(y_hat_engine1, color='red', label='Predicted RUL')
plt.plot(error, color='gray',linestyle ='--', label='$d_i$ = RUL\'-RUL')
plt.ylabel("RUL")
plt.xlabel("Cycle")
plt.legend(loc='upper right')
plt.title('[FD004 : 7 clusters] Engine %d' %unit)
#plt.savefig('C:/Users/aalah/Downloads/Final_results/7_Clust_error/Unit_%d.jpeg'%unit)
#plt.show()
!pip install shap
Requirement already satisfied: shap in c:\users\gyouness\anaconda3\lib\site-packages (0.41.0) Requirement already satisfied: cloudpickle in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (1.6.0) Requirement already satisfied: numba in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (0.53.1) Requirement already satisfied: packaging>20.9 in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (21.3) Requirement already satisfied: slicer==0.0.7 in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (0.0.7) Requirement already satisfied: scikit-learn in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (0.24.1) Requirement already satisfied: scipy in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (1.7.3) Requirement already satisfied: tqdm>4.25.0 in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (4.59.0) Requirement already satisfied: numpy in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (1.21.6) Requirement already satisfied: pandas in c:\users\gyouness\anaconda3\lib\site-packages (from shap) (1.2.4) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\users\gyouness\anaconda3\lib\site-packages (from packaging>20.9->shap) (2.4.7) Requirement already satisfied: setuptools in c:\users\gyouness\anaconda3\lib\site-packages (from numba->shap) (52.0.0.post20210125) Requirement already satisfied: llvmlite<0.37,>=0.36.0rc1 in c:\users\gyouness\anaconda3\lib\site-packages (from numba->shap) (0.36.0) Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\gyouness\anaconda3\lib\site-packages (from pandas->shap) (2.8.1) Requirement already satisfied: pytz>=2017.3 in c:\users\gyouness\anaconda3\lib\site-packages (from pandas->shap) (2021.1) Requirement already satisfied: joblib>=0.11 in c:\users\gyouness\anaconda3\lib\site-packages (from scikit-learn->shap) (1.0.1) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\gyouness\anaconda3\lib\site-packages (from scikit-learn->shap) (2.1.0) Requirement already satisfied: six>=1.5 in c:\users\gyouness\anaconda3\lib\site-packages (from python-dateutil>=2.7.3->pandas->shap) (1.15.0)
WARNING: There was an error checking the latest version of pip.
import shap
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()
print("SHAP version is:", shap.__version__)
print("Tensorflow version is:", tf.__version__)
# print the JS visualization code to the notebook
shap.initjs()
explainer = shap.DeepExplainer(loaded_model_lstm_7,train_array_7)
shap_values = explainer.shap_values(test_array_7)
SHAP version is: 0.41.0 Tensorflow version is: 2.6.0
#! pip3 install tensorflow-gpu --user
Collecting tensorflow-gpu Using cached tensorflow_gpu-2.10.1-cp38-cp38-win_amd64.whl (455.9 MB) Requirement already satisfied: astunparse>=1.6.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.6.3) Requirement already satisfied: six>=1.12.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.15.0) Requirement already satisfied: tensorboard<2.11,>=2.10 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (2.10.1) Requirement already satisfied: gast<=0.4.0,>=0.2.1 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (0.4.0) Requirement already satisfied: protobuf<3.20,>=3.9.2 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (3.18.1) Requirement already satisfied: keras<2.11,>=2.10.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (2.10.0) Requirement already satisfied: typing-extensions>=3.6.6 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (3.7.4.3) Requirement already satisfied: libclang>=13.0.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (15.0.6.1) Requirement already satisfied: google-pasta>=0.1.1 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (0.2.0) Requirement already satisfied: keras-preprocessing>=1.1.1 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.1.2) Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (0.29.0) Requirement already satisfied: termcolor>=1.1.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.1.0) Requirement already satisfied: wrapt>=1.11.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.12.1) Requirement already satisfied: packaging in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (21.3) Requirement already satisfied: numpy>=1.20 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.21.6) Requirement already satisfied: h5py>=2.9.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (3.1.0) Requirement already satisfied: setuptools in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (52.0.0.post20210125) Requirement already satisfied: opt-einsum>=2.3.2 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (3.3.0) Requirement already satisfied: grpcio<2.0,>=1.24.3 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.41.0) Requirement already satisfied: absl-py>=1.0.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (1.4.0) Requirement already satisfied: tensorflow-estimator<2.11,>=2.10.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (2.10.0) Requirement already satisfied: flatbuffers>=2.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorflow-gpu) (23.1.4) Requirement already satisfied: wheel<1.0,>=0.23.0 in c:\users\gyouness\anaconda3\lib\site-packages (from astunparse>=1.6.0->tensorflow-gpu) (0.36.2) Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (1.8.0) Requirement already satisfied: werkzeug>=1.0.1 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (1.0.1) Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (0.6.1) Requirement already satisfied: requests<3,>=2.21.0 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (2.25.1) Requirement already satisfied: google-auth<3,>=1.6.3 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (2.3.0) Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (0.4.6) Requirement already satisfied: markdown>=2.6.8 in c:\users\gyouness\anaconda3\lib\site-packages (from tensorboard<2.11,>=2.10->tensorflow-gpu) (3.3.4) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\users\gyouness\anaconda3\lib\site-packages (from packaging->tensorflow-gpu) (2.4.7) Requirement already satisfied: pyasn1-modules>=0.2.1 in c:\users\gyouness\anaconda3\lib\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.11,>=2.10->tensorflow-gpu) (0.2.8) Requirement already satisfied: cachetools<5.0,>=2.0.0 in c:\users\gyouness\anaconda3\lib\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.11,>=2.10->tensorflow-gpu) (4.2.4) Requirement already satisfied: rsa<5,>=3.1.4 in c:\users\gyouness\anaconda3\lib\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.11,>=2.10->tensorflow-gpu) (4.7.2) Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\users\gyouness\anaconda3\lib\site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.11,>=2.10->tensorflow-gpu) (1.3.0) Requirement already satisfied: certifi>=2017.4.17 in c:\users\gyouness\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorboard<2.11,>=2.10->tensorflow-gpu) (2020.12.5) Requirement already satisfied: chardet<5,>=3.0.2 in c:\users\gyouness\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorboard<2.11,>=2.10->tensorflow-gpu) (4.0.0) Requirement already satisfied: idna<3,>=2.5 in c:\users\gyouness\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorboard<2.11,>=2.10->tensorflow-gpu) (2.10) Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\gyouness\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorboard<2.11,>=2.10->tensorflow-gpu) (1.26.4) Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in c:\users\gyouness\anaconda3\lib\site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.11,>=2.10->tensorflow-gpu) (0.4.8) Requirement already satisfied: oauthlib>=3.0.0 in c:\users\gyouness\anaconda3\lib\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.11,>=2.10->tensorflow-gpu) (3.1.1) Installing collected packages: tensorflow-gpu Successfully installed tensorflow-gpu-2.10.1
WARNING: The scripts estimator_ckpt_converter.exe, import_pb_to_tensorboard.exe, saved_model_cli.exe, tensorboard.exe, tf_upgrade_v2.exe, tflite_convert.exe, toco.exe and toco_from_protos.exe are installed in 'C:\Users\gyouness\AppData\Roaming\Python\Python38\Scripts' which is not on PATH. Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location. WARNING: There was an error checking the latest version of pip.
with open("explainer.sav", "wb") as f:
explainer.save(f)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-105-0869bc97ebfd> in <module> 1 with open("explainer.sav", "wb") as f: ----> 2 explainer.save(f) ~\Anaconda3\lib\site-packages\shap\explainers\_explainer.py in save(self, out_file, model_saver, masker_saver) 417 super().save(out_file) 418 with Serializer(out_file, "shap.Explainer", version=0) as s: --> 419 s.save("model", self.model, model_saver) 420 s.save("masker", self.masker, masker_saver) 421 s.save("link", self.link) AttributeError: 'Deep' object has no attribute 'model'
shap_values = explainer.shap_values(test_array_7)
with open('shap_values.txt', 'w') as f:
for value in shap_values:
f.write(f"{value}\n")
shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], remaining_sensors_7)
print("Base Value : ", explainer.expected_value)
Base Value : [101.3882]
explainer.expected_value
len(shap_values)
1
test_array_7.shape
shap_values[0][0].shape
(30, 7)
i = 8
pred_i = model.predict(test_array_7[i:i+1])
sum_shap_i = shap_values[0][i].sum() + explainer.expected_value[0]
pred_i, sum_shap_i
# Plot SHAP for ONLY one observation i
i = 10
shap.initjs()
test_array_7_df = pd.DataFrame(data=test_array_7[i], columns = remaining_sensors_7)
shap.force_plot(explainer.expected_value[0], shap_values[0][i], test_array_7_df)
## Problem: Can not take into account many observations at the same time.
### The pic below explain for only 1 observation of 30 time steps, each time step has 7 clusters.
################# Plot AVERAGE shap values for ALL observations #####################
## Consider ABSOLUTE of SHAP values ##
shap_average_abs_value = np.abs(shap_values[0]).mean(axis=0)
x_average_value = pd.DataFrame(data=test_array_7.mean(axis=0), columns = remaining_sensors_7)
shap.force_plot(0, shap_average_abs_value, x_average_value)
# plot time step 7th for average shap
i = 0
shap.force_plot(0, shap_average_abs_value[i], x_average_value.iloc[i,:])
################# Plot AVERAGE shap values for ALL observations #####################
## Consider average (+ is different from -)
shap_average_value = shap_values[0].mean(axis=0)
x_average_value = pd.DataFrame(data=test_array_7.mean(axis=0), columns = remaining_sensors_7)
shap.force_plot(explainer.expected_value[0], shap_average_value, x_average_value)
print(remaining_sensors_7)
print(len(remaining_sensors_7))
['cluster1', 'cluster2', 'cluster3', 'cluster4', 'cluster5', 'cluster6', 'cluster7'] 7
i=0
j=0
test_array_7_df = pd.DataFrame(data=test_array_7[i][j].reshape(1,7), columns = remaining_sensors_7)
shap.force_plot(explainer.expected_value[0], shap_values[0][i][j], test_array_7_df)
shap_values_2D = shap_values[0].reshape(-1,7)
X_test_2D = test_array_7.reshape(-1,7)
shap_values_2D.shape,X_test_2D.shape
## 248x30=7440
((7440, 7), (7440, 7))
x_test_2d = pd.DataFrame(data=X_test_2D, columns = remaining_sensors_7)
x_test_2d.corr()
| cluster1 | cluster2 | cluster3 | cluster4 | cluster5 | cluster6 | cluster7 | |
|---|---|---|---|---|---|---|---|
| cluster1 | 1.000000 | 0.964570 | 0.967999 | 0.959317 | 0.962277 | 0.971246 | 0.976183 |
| cluster2 | 0.964570 | 1.000000 | 0.978986 | 0.984760 | 0.983130 | 0.986916 | 0.966184 |
| cluster3 | 0.967999 | 0.978986 | 1.000000 | 0.989598 | 0.992867 | 0.996964 | 0.995590 |
| cluster4 | 0.959317 | 0.984760 | 0.989598 | 1.000000 | 0.995196 | 0.993043 | 0.981600 |
| cluster5 | 0.962277 | 0.983130 | 0.992867 | 0.995196 | 1.000000 | 0.993847 | 0.985302 |
| cluster6 | 0.971246 | 0.986916 | 0.996964 | 0.993043 | 0.993847 | 1.000000 | 0.991172 |
| cluster7 | 0.976183 | 0.966184 | 0.995590 | 0.981600 | 0.985302 | 0.991172 | 1.000000 |
shap.summary_plot(shap_values_2D, x_test_2d)
shap.summary_plot(shap_values_2D, x_test_2d, plot_type="bar")
#The chosen sensors signify the remaining 16 sensors after normalization
chosen_sensors = ['T24', 'T30', 'T50', 'P15', 'P30','Nf', 'Nc', 'epr', 'Ps30', 'phi', 'NRf', 'NRc', 'BPR', 'htBleed', 'W31','W32']
#adding operational condition
X_train_interm = add_operational_condition(train)
X_test_interm = add_operational_condition(test)
#normalizing based on these operational condition
X_train_condition_scaled, X_test_condition_scaled = condition_scaler(X_train_interm, X_test_interm, chosen_sensors)
#drop the operational condition variables as they were only helpful for the scaling
data_train = X_train_condition_scaled.drop(['Cycle','Altitude', 'Mach', 'TRA','op_cond'],axis=1)
data_test = X_test_condition_scaled.drop(['Cycle','Altitude', 'Mach', 'TRA','op_cond'],axis=1)
#perform exponential smoothing on the data
train_smooth = exponential_smoothing(data_train,chosen_sensors,0,0.3) #here alpha=0.3, was found when performing GridSearch
test_smooth = exponential_smoothing(data_test,chosen_sensors,0,0.3)
# smooth the signal
train_smooth['RUL'].clip(upper=130, inplace=True)
Xtrain = train_smooth.drop(['RUL','Unit'],axis=1)
Xtest = test_smooth.drop(['Unit'],axis=1)
#y_test['RUL'].clip(upper=150, inplace=True)
pca_16 = PCA(n_components=16)
pca_16.fit(Xtrain)
pca_4 = PCA(n_components=4)
pca_4.fit(Xtrain)
pca_3 = PCA(n_components=3)
pca_3.fit(Xtrain)
PCA(n_components=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
PCA(n_components=3)
columns_16 = ['pca_%i' % i for i in range(16)]
columns_4 = ['pca_%i' % i for i in range(4)]
columns_3 = ['pca_%i' % i for i in range(3)]
df_pca_16 = pd.DataFrame(pca_16.transform(Xtrain), columns=columns_16, index=Xtrain.index)
df_pca_4 = pd.DataFrame(pca_4.transform(Xtrain), columns=columns_4, index=Xtrain.index)
df_pca_3 = pd.DataFrame(pca_3.transform(Xtrain), columns=columns_3, index=Xtrain.index)
#df_pca.head()
train_pca_16 = pca_16.transform(Xtrain)
test_pca_16 = pca_16.transform(Xtest)
train_pca_4 = pca_4.transform(Xtrain)
test_pca_4 = pca_4.transform(Xtest)
train_pca_3 = pca_3.transform(Xtrain)
test_pca_3 = pca_3.transform(Xtest)
columns_16=['PC1','PC2','PC3','PC4','PC5','PC6','PC7','PC8','PC9','PC10','PC11','PC12','PC13','PC14','PC15','PC16']
columns_4=['PC1','PC2','PC3','PC4']
columns_3=['PC1','PC2','PC3']
PCA_test_16 = pd.DataFrame(test_pca_16,columns=columns_16)
df_PCA_test_16 = pd.concat([df_test_unit,PCA_test_16], axis=1, join="inner")
PCA_test_4 = pd.DataFrame(test_pca_4,columns=columns_4)
df_PCA_test_4 = pd.concat([df_test_unit,PCA_test_4], axis=1, join="inner")
PCA_test_3 = pd.DataFrame(test_pca_3,columns=columns_3)
df_PCA_test_3 = pd.concat([df_test_unit,PCA_test_3], axis=1, join="inner")
# 16 components
PCA_train_16 = pd.DataFrame(train_pca_16,columns=columns_16)
mid_PCA_train_16 = pd.concat([df_train_unit,PCA_train_16], axis=1, join="inner")
df_PCA_train_16 = pd.concat([mid_PCA_train_16 , df_train_RUL], axis=1, join="inner")
# 4 components
PCA_train_4 = pd.DataFrame(train_pca_4,columns=columns_4)
mid_PCA_train_4 = pd.concat([df_train_unit,PCA_train_4], axis=1, join="inner")
df_PCA_train_4 = pd.concat([mid_PCA_train_4 , df_train_RUL], axis=1, join="inner")
# 3 components
PCA_train_3 = pd.DataFrame(train_pca_3,columns=columns_3)
mid_PCA_train_3 = pd.concat([df_train_unit,PCA_train_3], axis=1, join="inner")
df_PCA_train_3 = pd.concat([mid_PCA_train_3 , df_train_RUL], axis=1, join="inner")
model_3 = tf.keras.models.load_model('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/model_3PC')
model_4 = tf.keras.models.load_model('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/model_4PC')
model_16 = tf.keras.models.load_model('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/my_model_16_PC')
sequence_length = 30
# create sequences train, test
train_array_16 = gen_data_sequences_train(df_PCA_train_16, sequence_length,columns_16)
label_array_16 = gen_rul_sequences_train(df_PCA_train_16, sequence_length, ['RUL'])
test_gen_16 = (list(gen_test_data(df_PCA_test_16[df_PCA_test_16['Unit']==unit_nr], sequence_length,columns_16, -99.))
for unit_nr in df_PCA_test_16['Unit'].unique())
test_array_16 = np.concatenate(list(test_gen_16)).astype(np.float32)
# create sequences train, test
train_array_4 = gen_data_sequences_train(df_PCA_train_4, sequence_length,columns_4)
label_array_4 = gen_rul_sequences_train(df_PCA_train_4, sequence_length, ['RUL'])
test_gen_4 = (list(gen_test_data(df_PCA_test_4[df_PCA_test_4['Unit']==unit_nr], sequence_length,columns_4, -99.))
for unit_nr in df_PCA_test_4['Unit'].unique())
test_array_4 = np.concatenate(list(test_gen_4)).astype(np.float32)
# create sequences train, test
train_array_3 = gen_data_sequences_train(df_PCA_train_3, sequence_length,columns_3)
label_array_3 = gen_rul_sequences_train(df_PCA_train_3, sequence_length, ['RUL'])
test_gen_3 = (list(gen_test_data(df_PCA_test_3[df_PCA_test_3['Unit']==unit_nr], sequence_length,columns_3, -99.))
for unit_nr in df_PCA_test_3['Unit'].unique())
test_array_3 = np.concatenate(list(test_gen_3)).astype(np.float32)
'''
n_epochs = 20
batch_size = 192
model_3 = Sequential()
model_3.add(LSTM(units=512, activation='tanh',input_shape=(sequence_length, len(columns_3))))
model_3.add(Dense(units=512, activation='relu'))
model_3.add(Dropout(0.2))
model_3.add(Dense(1,activation='relu'))
model_3.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(lr=0.01))
'''
# predict and evaluate 3 PC
print("Model evaluationg for FD004 using 3 PC [Unclipped True RUL]")
y_hat_train_3 = model_3.predict(train_array_3)
evaluate(label_array_3, y_hat_train_3, 'train')
y_hat_test_3 = model_3.predict(test_array_3)
evaluate(y_test, y_hat_test_3)
print(' ')
print("Model evaluationg for FD004 using 3 PC [Clipped True RUL]")
y_hat_test_3 = model_3.predict(test_array_3)
evaluate(y_test['RUL'].clip(upper=130), y_hat_test_3)
print(' ')
print("S-score for test (using 3 PC) [Unclipped True RUL] : ",compute_s_score(y_test, y_hat_test_3))
print(' ')
print("S-score for test (using 3 PC) [Clipped True RUL] : ",compute_s_score(y_true_clipped, y_hat_test_3))
print(' ')
print('MAPE For Test (using 3 PC) [Unclipped True RUL]: ', compute_MAPE(y_test, y_hat_test_3))
print(' ')
print('MAPE For Test (using 3 PC) [Clipped True RUL] : ', compute_MAPE(y_true_clipped, y_hat_test_3))
Model evaluationg for FD004 using 3 PC [Unclipped True RUL] train set RMSE:17.382606506347656, R2:0.838988563630126 test set RMSE:29.217255497050335, R2:0.7128513937502876 Model evaluationg for FD004 using 3 PC [Clipped True RUL] test set RMSE:20.638153353070795, R2:0.7839419072877329 S-score for test (using 3 PC) [Unclipped True RUL] : 8711.476815379225 S-score for test (using 3 PC) [Clipped True RUL] : 2924.6954488062893 MAPE For Test (using 3 PC) [Unclipped True RUL]: RUL 29.674688 dtype: float64 MAPE For Test (using 3 PC) [Clipped True RUL] : RUL 25.963601 dtype: float64
'''
n_epochs = 10
batch_size = 288
model_16 = Sequential()
model_16.add(LSTM(units=192, activation='tanh',input_shape=(sequence_length, len(columns_16))))
model_16.add(Dense(units=192, activation='relu'))
model_16.add(Dropout(0.1))
model_16.add(Dense(1,activation='relu'))
model_16.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(lr=0.02))
'''
# predict and evaluate 16 PC
print("Model evaluation for FD004 using 16 PC [Unclipped True RUL] ")
y_hat_train_16 = model_16.predict(train_array_16)
evaluate(label_array_16, y_hat_train_16, 'train')
y_hat_test_16 = model_16.predict(test_array_16)
evaluate(y_test, y_hat_test_16)
print(' ')
print("Model evaluation for FD004 using 16 PC [Clipped True RUL]")
y_hat_test_16 = model_16.predict(test_array_16)
evaluate(y_test['RUL'].clip(upper=130), y_hat_test_16)
print(' ')
print("S-score for test (using 16 PC) [Unclipped True RUL]: ",compute_s_score(y_test, y_hat_test_16))
print(' ')
print("S-score for test (using 16 PC) [Clipped True RUL]: ",compute_s_score(y_true_clipped, y_hat_test_16))
print(' ')
print('MAPE For Test (using 16 PC) [Unclipped True RUL]: ', compute_MAPE(y_test, y_hat_test_16))
print(' ')
print('MAPE For Test (using 16 PC) [Clipped True RUL] : ', compute_MAPE(y_true_clipped, y_hat_test_16))
print(' ')
Model evaluation for FD004 using 16 PC [Unclipped True RUL] train set RMSE:18.770889282226562, R2:0.8122428046231166 test set RMSE:28.285323915172803, R2:0.7308773881575619 Model evaluation for FD004 using 16 PC [Clipped True RUL] test set RMSE:20.102325810067512, R2:0.7950152829610603 S-score for test (using 16 PC) [Unclipped True RUL]: 8125.983467991493 S-score for test (using 16 PC) [Clipped True RUL]: 2949.822329007005 MAPE For Test (using 16 PC) [Unclipped True RUL]: RUL 31.389666 dtype: float64 MAPE For Test (using 16 PC) [Clipped True RUL] : RUL 28.387417 dtype: float64
'''
n_epochs = 10
batch_size = 32
model_4 = Sequential()
model_4.add(LSTM(units=224, activation='tanh',input_shape=(sequence_length, len(columns_4))))
model_4.add(Dense(units=224, activation='relu'))
model_4.add(Dropout(0.3))
model_4.add(Dense(1,activation='relu'))
model_4.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(lr=0.0003))
'''
# predict and evaluate 4 PC
print(' ')
print("Model evaluationg for FD004 using 4 PC [Unclipped True RUL]")
y_hat_train_4 = model_4.predict(train_array_4)
evaluate(label_array_4, y_hat_train_4, 'train')
y_hat_test_4 = model_4.predict(test_array_4)
evaluate(y_test, y_hat_test_4)
print(' ')
print("Model evaluationg for FD004 using 4 PC [Clipped True RUL]")
y_hat_test_4 = model_4.predict(test_array_4)
evaluate(y_test['RUL'].clip(upper=130), y_hat_test_4)
print(' ')
print("S-score for test (using 4 PC) [Unclipped True RUL]: ",compute_s_score(y_test, y_hat_test_4))
print(' ')
print("S-score for test (using 4 PC) [Clipped True RUL]: ",compute_s_score(y_true_clipped, y_hat_test_4))
print(' ')
print('MAPE For Test (using 4 PC) [Unclipped True RUL] : ', compute_MAPE(y_test, y_hat_test_4))
print(' ')
print('MAPE For Test (using 4 PC) [Clipped True RUL] : ', compute_MAPE(y_true_clipped, y_hat_test_4))
print(' ')
Model evaluationg for FD004 using 4 PC [Unclipped True RUL] train set RMSE:19.235004425048828, R2:0.8028433067882396 test set RMSE:29.27775201418877, R2:0.7116610372455447 Model evaluationg for FD004 using 4 PC [Clipped True RUL] test set RMSE:19.84659562897858, R2:0.8001975037510192 S-score for test (using 4 PC) [Unclipped True RUL]: 7203.53379466783 S-score for test (using 4 PC) [Clipped True RUL]: 2524.4032826829143 MAPE For Test (using 4 PC) [Unclipped True RUL] : RUL 38.224946 dtype: float64 MAPE For Test (using 4 PC) [Clipped True RUL] : RUL 34.431245 dtype: float64
train_array_ab = gen_data_sequences_train(train_smooth, sequence_length,chosen_sensors)
label_array_ab = gen_rul_sequences_train(train_smooth, sequence_length, ['RUL'])
test_gen_ab = (list(gen_test_data(test_smooth[test_smooth['Unit']==unit_nr], sequence_length,chosen_sensors, -99.))
for unit_nr in test_smooth['Unit'].unique())
test_array_ab = np.concatenate(list(test_gen_ab)).astype(np.float32)
y_true_clipped = y_test.copy()
y_true_clipped['RUL'].clip(upper=130,inplace=True)
# model with 2 LSTM layers
model_1LSTM = tf.keras.models.load_model('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/model_FD004_chosen_sensors')
model_1LSTM.summary()
Model: "sequential_14"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_13 (LSTM) (None, 128) 74240
dense_26 (Dense) (None, 128) 16512
dropout_13 (Dropout) (None, 128) 0
dense_27 (Dense) (None, 1) 129
=================================================================
Total params: 90,881
Trainable params: 90,881
Non-trainable params: 0
_________________________________________________________________
# predict and evaluate
print('Results using model with 1 LSTM layer without feature clustering')
y_hat_test_1 = model_1LSTM.predict(test_array_ab)
print(' ')
print('(FD004) For sensors (Unclipped true RUL): ')
evaluate(y_test, y_hat_test_1)
print(' ')
print('(FD004) For sensors (Clipped true RUL): ')
evaluate(y_true_clipped, y_hat_test_1)
print(' ')
print("(FD004-Test) S-score For sensors (Unclipped true RUL) : ",compute_s_score(y_test, y_hat_test_1))
print(' ')
print("(FD004-Test) MAPE For sensors (Unclipped true RUL) : ",compute_MAPE(y_test, y_hat_test_1))
print(' ')
print("(FD004-Test) S-score For sensors (Clipped true RUL) : ",compute_s_score(y_true_clipped, y_hat_test_1))
print(' ')
print("(FD004-Test) MAPE For sensors (Clipped true RUL) : ",compute_MAPE(y_true_clipped, y_hat_test_1))
Results using model with 1 LSTM layer without feature clustering (FD004) For sensors (Unclipped true RUL): test set RMSE:28.4093278249413, R2:0.728512528643812 (FD004) For sensors (Clipped true RUL): test set RMSE:22.562970115455073, R2:0.7417612540994336 (FD004-Test) S-score For sensors (Unclipped true RUL) : 7260.171736277944 (FD004-Test) MAPE For sensors (Unclipped true RUL) : RUL 33.803269 dtype: float64 (FD004-Test) S-score For sensors (Clipped true RUL) : 5127.797315344584 (FD004-Test) MAPE For sensors (Clipped true RUL) : RUL 31.221838 dtype: float64
train_array_ab = gen_data_sequences_train(train_smooth, sequence_length,chosen_sensors)
label_array_ab = gen_rul_sequences_train(train_smooth, sequence_length, ['RUL'])
test_gen_ab = (list(gen_test_data(test_smooth[test_smooth['Unit']==unit_nr], sequence_length,chosen_sensors, -99.))
for unit_nr in test_smooth['Unit'].unique())
test_array_ab = np.concatenate(list(test_gen_ab)).astype(np.float32)
y_true_clipped = y_test.copy()
y_true_clipped['RUL'].clip(upper=130,inplace=True)
# model with 2 LSTM layers
model_2LSTM = tf.keras.models.load_model('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/model_FD004_2LSTM')
model_2LSTM.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_4 (LSTM) (None, 30, 128) 74240
dropout_3 (Dropout) (None, 30, 128) 0
lstm_5 (LSTM) (None, 128) 131584
dropout_4 (Dropout) (None, 128) 0
dense_3 (Dense) (None, 1) 129
=================================================================
Total params: 205,953
Trainable params: 205,953
Non-trainable params: 0
_________________________________________________________________
# predict and evaluate
print('Results using model with 2 LSTM layers')
y_hat_test_2 = model_2LSTM.predict(test_array_ab)
print(' ')
print('(FD004) For sensors (Unclipped true RUL): ')
evaluate(y_test, y_hat_test_2)
print(' ')
print('(FD004) For sensors (Clipped true RUL): ')
evaluate(y_true_clipped, y_hat_test_2)
print(' ')
print("(FD004-Test) S-score For sensors (Unclipped true RUL) : ",compute_s_score(y_test, y_hat_test_2))
print(' ')
print("(FD004-Test) MAPE For sensors (Unclipped true RUL) : ",compute_MAPE(y_test, y_hat_test_2))
print(' ')
print("(FD004-Test) S-score For sensors (Clipped true RUL) : ",compute_s_score(y_true_clipped, y_hat_test_2))
print(' ')
print("(FD004-Test) MAPE For sensors (Clipped true RUL) : ",compute_MAPE(y_true_clipped, y_hat_test_2))
Results using model with 2 LSTM layers (FD004) For sensors (Unclipped true RUL): test set RMSE:27.456393243378926, R2:0.7464200868799908 (FD004) For sensors (Clipped true RUL): test set RMSE:17.64173594399212, R2:0.8421256765639393 (FD004-Test) S-score For sensors (Unclipped true RUL) : 5532.161040448156 (FD004-Test) MAPE For sensors (Unclipped true RUL) : RUL 26.760547 dtype: float64 (FD004-Test) S-score For sensors (Clipped true RUL) : 1823.0533456162002 (FD004-Test) MAPE For sensors (Clipped true RUL) : RUL 22.712508 dtype: float64
# model with 3 LSTM layers
model_3LSTM = tf.keras.models.load_model('C:/Users/aalah/3_Feature-clustering-and-XAI-for-RUL-estimation-main/model_FD004_3LSTM_layer/')
model_3LSTM.summary()
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_8 (LSTM) (None, 30, 128) 74240
dropout_8 (Dropout) (None, 30, 128) 0
lstm_9 (LSTM) (None, 30, 128) 131584
dropout_9 (Dropout) (None, 30, 128) 0
lstm_10 (LSTM) (None, 128) 131584
dropout_10 (Dropout) (None, 128) 0
dense_3 (Dense) (None, 1) 129
=================================================================
Total params: 337,537
Trainable params: 337,537
Non-trainable params: 0
_________________________________________________________________
print('Results using model with 3 LSTM layers')
# predict and evaluate
y_hat_test_3 = model_3LSTM.predict(test_array_ab)
print(' ')
print('(FD004) For sensors (Unclipped true RUL): ')
evaluate(y_test, y_hat_test_3)
print(' ')
print('(FD004) For sensors (Clipped true RUL): ')
evaluate(y_true_clipped, y_hat_test_3)
print(' ')
print("(FD004-Test) S-score For sensors (Unclipped true RUL) : ",compute_s_score(y_test, y_hat_test_3))
print(' ')
print("(FD004-Test) MAPE For sensors (Unclipped true RUL) : ",compute_MAPE(y_test, y_hat_test_3))
print(' ')
print("(FD004-Test) S-score For sensors (Clipped true RUL) : ",compute_s_score(y_true_clipped, y_hat_test_3))
print(' ')
print("(FD004-Test) MAPE For sensors (Clipped true RUL) : ",compute_MAPE(y_true_clipped, y_hat_test_3))
Results using model with 3 LSTM layers (FD004) For sensors (Unclipped true RUL): test set RMSE:28.148373727412963, R2:0.7334771227616946 (FD004) For sensors (Clipped true RUL): test set RMSE:18.835609348712477, R2:0.8200349312039035 (FD004-Test) S-score For sensors (Unclipped true RUL) : 6553.3228478474075 (FD004-Test) MAPE For sensors (Unclipped true RUL) : RUL 32.090687 dtype: float64 (FD004-Test) S-score For sensors (Clipped true RUL) : 3063.8277706144836 (FD004-Test) MAPE For sensors (Clipped true RUL) : RUL 28.018711 dtype: float64
def create_lstm_2layers(input_shape):
history = tf.keras.callbacks.History()
model = Sequential()
model.add(LSTM(units=128, activation='tanh',return_sequences=True,input_shape=(sequence_length, len(chosen_sensors))))
model.add(Dropout(0.1))
model.add(LSTM(units=128, activation='tanh'))
model.add(Dropout(0.1))
model.add(Dense(1,activation='relu'))
model.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(lr=0.001))
return model
def create_lstm_3layers(input_shape):
history = tf.keras.callbacks.History()
model2 = Sequential()
model2.add(LSTM(units=128, activation='tanh',return_sequences=True,input_shape=(sequence_length, len(chosen_sensors))))
model2.add(Dropout(0.1))
model2.add(LSTM(units=128, activation='tanh',return_sequences=True))
model2.add(Dropout(0.1))
model2.add(LSTM(units=128, activation='tanh'))
model2.add(Dropout(0.1))
model2.add(Dense(1,activation='relu'))
model2.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(lr=0.001))
return model2
from os import path
#outpath = "C:/Users/aalah/Downloads/результати/LSTM"
for unit in df_PCA_train_4['Unit'].unique():
engine4 = df_PCA_train_4[df_PCA_train_4['Unit']==unit]
engine1 = train_7[train_7['Unit']==unit]
engine_all = train_smooth[train_smooth['Unit']==unit]
engine_array4 = gen_data_sequences_train(engine4, sequence_length,columns_4)
engine_label_array4 = gen_rul_sequences_train(engine4, sequence_length, ['RUL'])
y_hat_engine4 = model_4.predict(engine_array4)
engine_array1 = gen_data_sequences_train(engine1, sequence_length,remaining_sensors_7)
engine_label_array1 = gen_rul_sequences_train(engine1, sequence_length, ['RUL'])
y_hat_engine1 = loaded_model_lstm_7.predict(engine_array1)
engine_array_all = gen_data_sequences_train(engine_all, sequence_length,chosen_sensors)
engine_label_array_all = gen_rul_sequences_train(engine_all, sequence_length, ['RUL'])
y_hat_engine_all = model_2LSTM.predict(engine_array_all)
# for one engine from the train dataset
plt.figure(figsize=(10,5))
plt.plot(y_hat_engine4, color='darkorange', label='Predicted RUL (4PC+LSTM)')
plt.plot(engine_label_array4, color='blue', label='True RUL')
plt.plot(y_hat_engine1, color='red', label='Predicted RUL (7ClustOfVar+LSTM(1))')
plt.plot(y_hat_engine_all, color='turquoise', label='Predicted RUL (LSTM(2))')
plt.ylabel("RUL")
plt.xlabel("Cycle")
plt.legend(loc='lower left')
plt.title('[FD004 : Comparative study] Engine %d' %unit)
#plt.savefig('C:/Users/aalah/Downloads/comparative/Unit_%d.jpeg'%unit)
#plt.show()